
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64 (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

[Previously saved workspace restored]

> rm(list=ls())
> gc()
         used (Mb) gc trigger (Mb) max used (Mb)
Ncells 218084 11.7     460000 24.6   350000 18.7
Vcells 319634  2.5    1678972 12.9  1867149 14.3
> require(foreign)
Loading required package: foreign
> require(ineq)
Loading required package: ineq
Warning message:
package 'ineq' was built under R version 3.3.2 
> require(dplyr)
Loading required package: dplyr

Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

> library(Hmisc)
Loading required package: lattice
Loading required package: survival
Loading required package: Formula
Loading required package: ggplot2

Attaching package: 'Hmisc'

The following objects are masked from 'package:dplyr':

    combine, src, summarize

The following objects are masked from 'package:base':

    format.pval, round.POSIXt, trunc.POSIXt, units

> require(corrplot)
Loading required package: corrplot
> require(cem)
Loading required package: cem
Loading required package: tcltk

How to use CEM? Type vignette("cem")

> require(BayesTree)
Loading required package: BayesTree
> require(stargazer)
Loading required package: stargazer

Please cite as: 

 Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2. http://CRAN.R-project.org/package=stargazer 

> require(texreg)
Loading required package: texreg
Version:  1.36.23
Date:     2017-03-03
Author:   Philip Leifeld (University of Glasgow)

Please cite the JSS article in your publications -- see citation("texreg").
Warning message:
package 'texreg' was built under R version 3.3.3 
> require(systemfit)
Loading required package: systemfit
Loading required package: Matrix
Loading required package: car
Loading required package: lmtest
Loading required package: zoo

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

> require(SemiPar)
Loading required package: SemiPar

Attaching package: 'SemiPar'

The following object is masked from 'package:car':

    spm

Warning message:
package 'SemiPar' was built under R version 3.3.3 
> setwd(paste(substr(getwd(),1,regexpr("Dropbox",getwd())[1]-1),"Dropbox/coauthors/AMT_Bisbee_Larson/bisbee_larson_replication_final",sep=""))
> source("./tools/leg2.R")
> getwd()
[1] "D:/Dropbox/coauthors/AMT_Bisbee_Larson/bisbee_larson_replication_final"
> 
> ###################################################################################
> ##
> ##  File Name: models_results.R
> ##
> ##  Input Files: final_data_clean.dta
> ##  Output Files: donate-simulations.RData        
> ##                donate-homo-simulations.RData
> ##                models_donate_pg_bivariate.pdf  (Figure 9)
> ##                models_donate_homopol_all.pdf   (Figure 10)
> ##
> ##  Purpose:  This file conducts the random cross-validation and creates the associated
> ##            figures (#9 and #10) presented in the paper. In addition, the script generates 
> ##            .RData files of the simulated results to save computing time. The function
> ##            defined at the top of the script implements the cross-validation simulations. 
> ##            Note that the cross-validations, by virtue of being random, may yield slightly
> ##            different results from the published paper, due only to the randomness of the 
> ##            sub-samples.
> ##
> ##############################################################################################
> 
> ## Creating Functions
> cv.func <- function(form,dat,model = "biv",split = "online",counter) {
+   if(model == "sur") {
+     dat2 <- dat[,which(grepl(paste(paste(unlist(sapply(1:17,function(x) as.character(form[[x]][3]))),collapse="|"),"|online|",as.character(form[[1]][2]),sep=""),colnames(dat)))]
+     dat2 <- dat2[complete.cases(dat2),]
+   } else {
+     dat2 <- dat[,which(grepl(paste(gsub("\\+|\\~","|",as.character(form)),"|ipwbart|cem_weights|online",sep=""),colnames(dat)))]
+     dat2 <- dat2[complete.cases(dat2),]
+   }
+   if(split == "online") {
+     train <- dat2 %>% filter(online == 1)
+     test <- dat2 %>% filter(online == 0)
+   } else if(split == "online2") {
+     ind <- sample(1:nrow(dat2),size = round(nrow(dat2)/2,0),replace = F)
+     tmp <- dat2[ind,]
+     train <- tmp %>% filter(online == 1)
+     test <- tmp %>% filter(online == 0)
+   } else if(split == "sim") {
+     ind <- sample(1:nrow(dat2),size = round(nrow(dat2)/2,0),replace = F)
+     train <- dat2[ind,]
+     test <- dat2[-ind,]
+   } else {
+     tmp <- dat2[which(dat2$online == 0),]
+     ind <- sample(1:nrow(tmp),size = round(nrow(tmp)/2,0),replace = F)
+     train <- tmp[ind,]
+     test <- tmp[-ind,]
+   }
+   if(model == "biv" | model == "controls") {
+     w <- rep(1,nrow(train))
+     mod <- lm(as.formula(form),data = train)
+   } else if(model == "ipw") {
+     w <- train$ipwbart
+     mod <- lm(as.formula(form),dat = train,weights = w)
+   } else if(model == "cem") {
+     w <- train$cem_weights
+     mod <- lm(as.formula(form),dat = train,weights = w)
+   } else if(model == "sur") {
+     w <- rep(1,nrow(train))
+     mod <- systemfit(form,data=train,method = "SUR")
+   }
+   res <- predict(mod,newdata = test)
+   if(model == "sur") {
+     n <- nrow(res)
+     
+     ll <- mean(sapply(1:ncol(res),function(x) 0.5 * (sum(log(w)) - n * (log(2 * pi) + 1 - log(n) + log(sum(w * res[,x]^2,na.rm=T))))))
+     df.ll <- mod$rank + 1
+     bic <- -2 * ll + log(n) * df.ll
+     
+     bias <- mean(sapply(1:ncol(res),function(x) mean(res[,x],na.rm=T)))
+     biassq <- mean(sapply(1:ncol(res),function(x) mean(res[,x]^2,na.rm=T)))
+     rmse <- sqrt(biassq)
+     sd <- mean(sapply(1:ncol(res),function(x) sd(res[,x],na.rm=T)))
+   } else {
+     n <- length(res)
+     
+     ll <- 0.5 * (sum(log(w) * is.finite(log(w)),na.rm=T) - n * (log(2 * pi) + 1 - log(n) + log(sum(w * res^2,na.rm=T))))
+     df.ll <- mod$rank + 1
+     bic <- -2 * ll + log(n) * df.ll
+     
+     bias <- mean(res,na.rm=T)
+     biassq <- mean(res^2,na.rm=T)
+     rmse <- sqrt(biassq)
+     sd <- sd(res,na.rm=T)
+   }
+   #cat(paste(round(counter/1000,2),"\n"))
+   return(list(bic = bic,bias = bias,biassq = biassq,rmse = rmse,sd = sd))
+ }
> 
> dat <- read.dta("./tools/final_data_clean.dta")
> 
> ###############################################################################################
> # Cross validation analysis: preparing the data
> ###############################################################################################
> ties <- c("clique","weakest","weak","strong","strongest")
> mod.results <- list(NA)
> j = 1
> for(mods in c("biv","controls","ipw","cem")) {#},"sur")) {
+   i = 1
+   raw.sims <- list(NA)
+   for(tie in ties) {
+     if(mods == "biv" | mods == "ipw" | mods == "cem") {
+       form <- paste("std_donate_",tie,"~std_personalgain_",tie,sep="")
+     } else if(mods == "controls") {
+       form <- paste("std_donate_",tie,"~std_personalgain_",tie,"+",paste(colnames(dat)[which(grepl("^dem2_",colnames(dat)))],collapse="+"),sep="")
+     } else if(mods == "sur") {
+       controls <- colnames(dat)[which(grepl(tie,colnames(dat)))][which(grepl("^std_",colnames(dat)[which(grepl(tie,colnames(dat)))]))]
+       controls <- controls[-which(grepl("std_tie_synth|std_donate_",controls))]
+       if(tie == "weak" | tie == "strong") {
+         controls <- controls[-which(grepl("est",controls))]
+       }
+       form <- as.list(paste("std_donate_",tie,"~",controls,sep=""))
+       form <- sapply(1:17,function(x) as.formula(form[[x]][1]))
+     }
+     raw.sims[[i]] <- sapply(1:1000,function(x) cv.func(form,dat,model=mods,split="sim",counter = x))
+     i = i+1
+   }
+   mod.results[[j]] <- raw.sims
+   j = j+1
+ }
There were 50 or more warnings (use warnings() to see the first 50)
> 
> save(mod.results,file = "./models/donate-simulations.RData")
> 
> ties <- c("clique","weakest","weak","strong","strongest")
> homo.results <- list(NA)
> j = 1
> for(mods in c("biv","controls","ipw","cem")) {#},"sur")) {
+   i = 1
+   raw.sims <- list(NA)
+   for(tie in ties) {
+     if(mods == "biv" | mods == "ipw" | mods == "cem") {
+       form <- paste("std_donate_",tie,"~std_homo_pol_",tie,sep="")
+     } else if(mods == "controls") {
+       form <- paste("std_donate_",tie,"~std_homo_pol_",tie,"+",paste(colnames(dat)[which(grepl("^dem2_",colnames(dat)))],collapse="+"),sep="")
+     } else if(mods == "sur") {
+       controls <- colnames(dat)[which(grepl(tie,colnames(dat)))][which(grepl("^std_",colnames(dat)[which(grepl(tie,colnames(dat)))]))]
+       controls <- controls[-which(grepl("std_tie_synth|std_donate_",controls))]
+       if(tie == "weak" | tie == "strong") {
+         controls <- controls[-which(grepl("est",controls))]
+       }
+       tmp <- as.list(paste("std_donate_",tie,"~",controls,sep=""))
+       form <- sapply(1:17,function(x) as.formula(tmp[[x]][1]))
+     }
+     raw.sims[[i]] <- sapply(1:1000,function(x) cv.func(form,dat,model=mods,split="sim",counter = x))
+     i = i+1
+   }
+   homo.results[[j]] <- raw.sims
+   j = j+1
+ }
There were 50 or more warnings (use warnings() to see the first 50)
> 
> save(homo.results,file = "./models/donate-homo-simulations.RData")
> 
> 
> 
> 
> ###############################################################################################
> # models_donate_pg_bivariate.pdf: Figure 9
> ###############################################################################################
> load("./models/donate-simulations.RData")
> 
> y.axis <- 1:5
> 
> ties <- c("clique","weakest","weak","strong","strongest")
> min.1 <- -5
> max.1 <- 4
> simpleCap <- function(x) {
+   s <- strsplit(x, " ")[[1]]
+   paste(toupper(substring(s, 1,1)), substring(s, 2),
+         sep="", collapse=" ")
+ }
> set.seed(123)
> min.1 <- -3
> max.1 <- 3
> pdf("./1_Figures/models_donate_pg_bivariate.pdf",height=7,width=7)
> par(xpd=F)
> plot(rep(0,5),y.axis,type='n',xlab="",ylab="",yaxt='n',
+      xlim=c(min.1,max.1),bty='n',ylim=c(1,6),xaxt='n',main="Bias and Variance in Out-of-Sample Predictions")
> mtext(capitalize(ties),2,at=y.axis,las=1,cex=.7,adj=0,padj=-.5)
> segments(seq(min.1+1,max.1-1,by=1),1,seq(min.1+1,max.1-1,by=1),5.75,col="grey80")
> mtext(seq(min.1+1,max.1-1,by=2),3,at=seq(min.1+1,max.1-1,by=2),cex=.7,col="grey60",line=-1.5)
> mtext("% of Sims > On/Off",3,at=2.7,line=-2.5,cex=.7)
> i = 1
> for(tie in ties) {
+   sims <- sapply(1:1000, function(x) rnorm(100,mod.results[[1]][[i]][,x]$bias,mod.results[[1]][[i]][,x]$sd))
+   bic.sims <- sapply(1:1000, function(x) mod.results[[1]][[i]][,x]$bic)
+   rmse.sims <- sapply(1:1000,function(x) mod.results[[1]][[i]][,x]$rmse)
+   absbias.sims <- sapply(1:1000,function(x) abs(mod.results[[1]][[i]][,x]$bias))
+   
+   x.full <- density(sims)$x
+   raw.full <- density(sims)$y
+   rescale.full <- (raw.full-min(raw.full))/(max(raw.full)-min(raw.full))/2.5
+   hx.full <- c(rescale.full+i)
+   hgt <- max(rescale.full)
+   
+   polygon(x.full,hx.full, col=rgb(0,0,0,.2), border = rgb(0,0,0,.2),lwd=1)
+   
+   raw.onoff <- cv.func(paste("std_donate_",tie,"~std_personalgain_",tie,sep=""),dat,model="biv",split="online",counter=1)
+   onoff <- rnorm(100000,raw.onoff$bias,raw.onoff$sd)
+   bic.onoff <- raw.onoff$bic
+   rmse.onoff <- raw.onoff$rmse
+   absbias.onoff <- abs(raw.onoff$bias)
+   
+   x.full <- density(onoff)$x
+   raw.full <- density(onoff)$y
+   rescale.full <- (raw.full-min(raw.full))/(max(raw.full)-min(raw.full))/2.5
+   hx.full <- c(rescale.full+i)
+   hgt <- max(rescale.full)
+ 
+   polygon(x.full,hx.full, col=rgb(1,1,1,.2), lty=2)
+   
+   bic.test <- mean((bic.sims - bic.onoff)>2)
+   rmse.test <- mean(rmse.sims > rmse.onoff)
+   absbias.test <- mean(absbias.sims > absbias.onoff)
+   stars.bic <- ifelse(bic.test <= .01 | bic.test >= .99,"***",ifelse(bic.test<=.05 | bic.test >= .95,"**",ifelse(bic.test<=.1 | bic.test >= .9,"*","")))
+   cols.bic <- ifelse(bic.test <= .1 | bic.test >= .9,rgb(0,0,0,.7),rgb(0,0,0,.2))
+   stars.rmse <- ifelse(rmse.test <= .01 | rmse.test >= .99,"***",ifelse(rmse.test<=.05 | rmse.test >= .95,"**",ifelse(rmse.test<=.1 | rmse.test >= .9,"*","")))
+   cols.rmse <- ifelse(rmse.test <= .1 | rmse.test >= .9,rgb(0,0,0,.7),rgb(0,0,0,.2))
+   stars.absbias <- ifelse(absbias.test <= .01 | absbias.test >= .99,"***",ifelse(absbias.test<=.05 | absbias.test >= .95,"**",ifelse(absbias.test<=.1 | absbias.test >= .9,"*","")))
+   cols.absbias <- ifelse(absbias.test <= .1 | absbias.test >= .9,rgb(0,0,0,.7),rgb(0,0,0,.2))
+   
+   text(2.2,i+hgt+.1,cex = .7,adj= 0,labels = paste("BIC: ",round(bic.test,2),stars.bic,sep=""),col="black")
+   text(2.2,i+hgt,cex = .7,adj = 0,labels = paste("RMSE: ",round(rmse.test,2),stars.rmse,sep=""),col="black")
+   text(2.2,i+hgt-.1,cex = .7,adj = 0,labels = paste("Abs. Bias: ",round(absbias.test,2),stars.absbias,sep=""),col="black")
+    
+   segments(raw.onoff$bias,i,raw.onoff$bias,i+hgt,lty=2)
+   segments(mean(sims),i,mean(sims),i+hgt,lty=1)
+   i = i+1
+ }
Warning messages:
1: In w * res^2 :
  longer object length is not a multiple of shorter object length
2: In w * res^2 :
  longer object length is not a multiple of shorter object length
3: In w * res^2 :
  longer object length is not a multiple of shorter object length
4: In w * res^2 :
  longer object length is not a multiple of shorter object length
5: In w * res^2 :
  longer object length is not a multiple of shorter object length
> abline(h=y.axis)
> par(xpd=T)
> leg2("center",legend=c("Online / Offline Error","Random CV Error"),fill.lty=c(2,0),
+      inset=c(0,-.15),horiz=T,
+      fill = c("white",rgb(168,168,168,max=255,alpha=250)),cex=.9,bty='n',pt.cex=1.3,box.col="grey80",bg="white")
> dev.off()
null device 
          1 
> 
> 
> ###############################################################################################
> # models_donate_homopol_all.pdf: Figure 10
> ###############################################################################################
> load("./models/donate-homo-simulations.RData")
> 
> ties <- c("clique","weakest","weak","strong","strongest")
>   y.axis <- 1:5
> set.seed(1)
>   pdf(paste0("./1_Figures/models_donate_homopol_all.pdf"),height=5,width = 7)
>   par(xpd=F)
>   nf <- layout(matrix(c(1,2,3,4,5,6,6,6,6,6),2,5,byrow=T),widths=c(.4,1.4,1.4,1.4,1.4),heights=c(6,1))
>   #layout.show(nf)
>   par(mar=c(0,2,3,0))
>   plot(rep(0,5),y.axis,type='n',xlab="",ylab="",yaxt='n',
+        bty='n',ylim=c(1,6),xaxt='n',main="")
>   mtext(capitalize(ties),2,at=y.axis,las=1,cex=.7,adj=0,padj=-.5)
>   abline(h=y.axis)
>   par(xpd=T)
>   j = 1
>   for (mods in c("biv","controls","ipw","cem")) { #,"sur")) {
+     min.1 <- -1
+     max.1 <- 1
+     mains <- ifelse(mods == "biv","Bivariate",ifelse(mods == "controls","Controls",toupper(mods)))
+     plot(rep(0,5),y.axis,type='n',xlab="",ylab="",yaxt='n',
+          xlim=c(min.1,max.1),bty='n',ylim=c(1,6),xaxt='n',main=mains)
+     i = 1
+     for(tie in ties) {
+       gap <- (max.1 - min.1)/5
+       sims <- sapply(1:1000, function(x) rnorm(10,homo.results[[j]][[i]][,x]$bias,homo.results[[j]][[i]][,x]$sd))
+       bic.sims <- sapply(1:1000, function(x) homo.results[[j]][[i]][,x]$bic)
+       rmse.sims <- sapply(1:1000,function(x) homo.results[[j]][[i]][,x]$rmse)
+       absbias.sims <- sapply(1:1000,function(x) abs(homo.results[[j]][[i]][,x]$bias))
+       
+       x.full <- density(sims)$x
+       raw.full <- density(sims)$y
+       rescale.full <- (raw.full-min(raw.full))/(max(raw.full)-min(raw.full))/2.5
+       hx.full <- c(rescale.full+i)
+       hgt <- max(rescale.full)
+       
+       polygon(x.full,hx.full, col=rgb(0,0,0,.2), border = rgb(0,0,0,.2),lwd=1)
+       
+       if(mods == "biv" | mods == "ipw" | mods == "cem") {
+         form <- paste("std_donate_",tie,"~std_homo_pol_",tie,sep="")
+       } else if(mods == "controls") {
+         form <- paste("std_donate_",tie,"~std_homo_pol_",tie,"+",paste(colnames(dat)[which(grepl("^dem2_",colnames(dat)))],collapse="+"),sep="")
+       } else if(mods == "sur") {
+         controls <- colnames(dat)[which(grepl(tie,colnames(dat)))][which(grepl("^std_",colnames(dat)[which(grepl(tie,colnames(dat)))]))]
+         controls <- controls[-which(grepl("std_tie_synth|std_donate_",controls))]
+         if(tie == "weak" | tie == "strong") {
+           controls <- controls[-which(grepl("est",controls))]
+         }
+         tmp <- as.list(paste("std_donate_",tie,"~",controls,sep=""))
+         form <- sapply(1:17,function(x) as.formula(tmp[[x]][1]))
+       }
+       raw.onoff <- cv.func(form,dat,model=mods,split="online",counter = 1)
+       onoff <- rnorm(10000,raw.onoff$bias,raw.onoff$sd)
+       bic.onoff <- raw.onoff$bic
+       rmse.onoff <- raw.onoff$rmse
+       absbias.onoff <- abs(raw.onoff$bias)
+       
+       x.full <- density(onoff)$x
+       raw.full <- density(onoff)$y
+       rescale.full <- (raw.full-min(raw.full))/(max(raw.full)-min(raw.full))/2.5
+       hx.full <- c(rescale.full+i)
+       hgt <- max(rescale.full)
+       
+       polygon(x.full,hx.full, col=rgb(1,1,1,.2), lty=2)
+       
+       bic.test <- mean((bic.sims - bic.onoff)>2)
+       rmse.test <- mean(rmse.sims > rmse.onoff)
+       absbias.test <- mean(absbias.sims > absbias.onoff)
+       
+       stars.rmse <- ifelse(rmse.test <= .01 | rmse.test >= .99,"***",ifelse(rmse.test <= .05 | rmse.test >= .95,"**",ifelse(rmse.test <= .1 | rmse.test >= .9,"*","")))
+       cols.rmse <- ifelse(rmse.test <=.1 | rmse.test >=.9,rgb(0,0,0,.7),rgb(0,0,0,.2))
+       stars.absbias <- ifelse(absbias.test <=.01 | absbias.test >=.99,"***",ifelse(absbias.test <=.05 | absbias.test >=.95,"**",ifelse(absbias.test <=.1 | absbias.test >=.9,"*","")))
+       cols.absbias <- ifelse(absbias.test <=.1 | absbias.test >=.9,rgb(0,0,0,.7),rgb(0,0,0,.2))
+       stars.bic <- ifelse(bic.test <=.01 | bic.test >=.99,"***",ifelse(bic.test <=.05 | bic.test >=.95,"**",ifelse(bic.test <=.1 | bic.test >=.9,"*","")))
+       cols.bic <- ifelse(bic.test <=.1 | bic.test >=.9,rgb(0,0,0,.7),rgb(0,0,0,.2))
+       
+       text(0,i+hgt+.1,cex = .7,adj = 0,labels = paste("BIC: ",round(bic.test,2),stars.bic,sep=""),pos = 4,offset = 1,col = "black")
+       text(0,i+hgt,cex = .7,adj = 0,labels = paste("RMSE: ",round(rmse.test,2),stars.rmse,sep=""),pos = 4,offset = 1,col = "black")
+       #text(0,i+hgt-.1,cex = .7,adj = 0,labels = paste("Abs. Bias: ",round(absbias.test,2),stars.absbias,sep=""),pos = 4,offset = 1,col = cols.absbias)
+       # 
+       segments(raw.onoff$bias,i,raw.onoff$bias,i+hgt,lty=2)
+       segments(mean(sims),i,mean(sims),i+hgt,lty=1)
+       i = i+1
+     }
+     abline(h=y.axis)
+     segments(0,1,0,5.75,col=rgb(0,0,0,.2))
+     text(0,6,labels = 0,col=rgb(0,0,0,.2))
+     j = j+1
+   }
There were 25 warnings (use warnings() to see them)
>   par(xpd=T)
>   par(mar=c(0,0,0,0))
>   plot(0,0,type='n',xlab="",ylab="",xaxt='n',yaxt='n',bty='n')
>   leg2("center",legend=c("Online / Offline Error","Random CV Error"),fill.lty=c(2,0),
+        inset=c(0,-.15),horiz=T,
+        fill = c("white",rgb(168,168,168,max=255,alpha=250)),cex=1,bty='n',pt.cex=1.3,box.col="grey80",bg="white")
>   dev.off()
null device 
          1 
>   
> 
> proc.time()
   user  system elapsed 
 102.70    1.00  104.61 
